library(tidyverse)
library(broom)
library(lubridate)
library(devtools)
library(dsbox)
library(ggridges)
library(tidymodels)
library(openintro)
library(plotly)
US_deaths_cases <-
read_csv("/cloud/project/data/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv")
## Rows: 37380 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): submission_date, state, created_at, consent_cases, consent_deaths
## dbl (10): tot_cases, conf_cases, prob_cases, new_case, pnew_case, tot_death,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(US_deaths_cases)
## Rows: 37,380
## Columns: 15
## $ submission_date <chr> "02/12/2021", "03/01/2021", "08/22/2020", "08/12/2020"…
## $ state <chr> "UT", "CO", "AR", "AS", "AS", "MA", "GA", "NYC", "AS",…
## $ tot_cases <dbl> 359641, 438745, 56199, 0, 0, 704796, 1187107, 948436, …
## $ conf_cases <dbl> 359641, 411869, NA, NA, NA, 659246, 937515, 782257, NA…
## $ prob_cases <dbl> 0, 26876, NA, NA, NA, 45550, 249592, 166179, NA, NA, 4…
## $ new_case <dbl> 1060, 677, 547, 0, 0, 451, 3829, 394, 0, 8835, 2766, 3…
## $ pnew_case <dbl> 0, 60, 0, 0, NA, 46, 1144, 95, 0, 2003, 317, 29, 400, …
## $ tot_death <dbl> 1785, 5952, 674, 0, 0, 17818, 21690, 33203, 0, 19190, …
## $ conf_death <dbl> 1729, 5218, NA, NA, NA, 17458, 18725, 28130, NA, NA, 3…
## $ prob_death <dbl> 56, 734, NA, NA, NA, 360, 2965, 5073, NA, NA, 0, 307, …
## $ new_death <dbl> 11, 1, 11, 0, 0, 5, 7, 6, 0, 66, 3, 15, 10, 69, 0, 5, …
## $ pnew_death <dbl> 2, 0, 0, 0, NA, 0, 0, 0, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0,…
## $ created_at <chr> "02/13/2021 02:50:08 PM", "03/01/2021 12:00:00 AM", "0…
## $ consent_cases <chr> "Agree", "Agree", "Not agree", NA, NA, "Agree", "Agree…
## $ consent_deaths <chr> "Agree", "Agree", "Not agree", NA, NA, "Agree", "Agree…
new_US_deaths_cases <- US_deaths_cases %>%
filter(!(state %in% c("NYC", "PR", "GU",
"VI", "MP", "RMI",
"AS", "PW", "FSM"))) %>%
group_by(submission_date) %>%
summarise(total_new_cases = sum(new_case),
total_new_deaths = sum(new_death),
total_cases = sum(tot_cases),
total_deaths = sum(tot_death),
state = state)
## `summarise()` has grouped output by 'submission_date'. You can override using the `.groups` argument.
new_US_deaths_cases$submission_date = as.Date(new_US_deaths_cases$submission_date,
format="%m/%d/%Y")
new_US_deaths_cases <- new_US_deaths_cases %>%
arrange(desc(submission_date))
new_US_deaths_cases
## # A tibble: 31,773 × 6
## # Groups: submission_date [623]
## submission_date total_new_cases total_new_deaths total_cases total_deaths
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2021-10-05 97795 1931 42579909 666441
## 2 2021-10-05 97795 1931 42579909 666441
## 3 2021-10-05 97795 1931 42579909 666441
## 4 2021-10-05 97795 1931 42579909 666441
## 5 2021-10-05 97795 1931 42579909 666441
## 6 2021-10-05 97795 1931 42579909 666441
## 7 2021-10-05 97795 1931 42579909 666441
## 8 2021-10-05 97795 1931 42579909 666441
## 9 2021-10-05 97795 1931 42579909 666441
## 10 2021-10-05 97795 1931 42579909 666441
## # … with 31,763 more rows, and 1 more variable: state <chr>
case_graph = plot_geo(new_US_deaths_cases,
locationmode = 'USA-states',
frame = ~submission_date) %>%
add_trace(locations = ~state,
z = ~total_new_cases,
zmin = 0,
zmax = max(new_US_deaths_cases$total_new_cases),
color = ~total_new_cases,
colorscale = 'electric')
case_graph